library(tidyverse)
library(moderndive)
library(infer)
metadata = read_csv("clean_metadata.csv")
Missing column names filled in: 'X1' [1]Parsed with column specification:
cols(
X1 = [32mcol_double()[39m,
id = [32mcol_double()[39m,
imdbId = [31mcol_character()[39m,
movieId = [32mcol_double()[39m,
tmdbId = [32mcol_double()[39m,
original_title = [31mcol_character()[39m,
title = [31mcol_character()[39m,
popularity = [32mcol_double()[39m,
release_date = [34mcol_date(format = "")[39m,
revenue = [32mcol_double()[39m,
runtime = [32mcol_double()[39m,
budget = [32mcol_double()[39m,
vote_average = [32mcol_double()[39m,
vote_count = [32mcol_double()[39m,
original_language = [31mcol_character()[39m
)
metadata = metadata %>% select(-X1)
metadata
metadata %>%
arrange(desc(budget))
metadata = metadata %>%
separate(col = release_date, into = c("year", "month", "day"), sep = '-')
metadata
metadata %>%
group_by(month) %>%
summarize(median_revenue = median(revenue, na.rm = T)) %>%
arrange(desc(median_revenue))
NA
metadata %>% filter(revenue < 1.5e+9) %>%
ggplot(aes(month, revenue, fill = month)) + geom_boxplot(show.legend = F) + coord_flip()

metadata %>%
ggplot(aes(budget, vote_average)) + geom_point() + geom_smooth() + geom_smooth(method = 'lm', color = 'tomato') + ggtitle("Average Vote vs. Budget") + xlab("Budget") + ylab("Average Vote")

metadata %>%
arrange(desc(revenue)) %>%
select(title, revenue, budget, year) %>%
head()
metadata %>%
filter(budget > 2e+8) %>%
arrange((revenue)) %>%
select(title, revenue, budget, year) %>%
head()
metadata %>%
get_correlation(revenue ~ budget, na.rm = T)
metadata %>% filter(popularity < 150) %>%
ggplot(aes(popularity, vote_average)) + geom_point() + geom_smooth()

metadata %>%
ggplot(aes(vote_average, revenue)) + geom_point() + geom_smooth() + geom_smooth(method = 'lm', color = 'tomato')

metadata %>%
ggplot(aes(budget, vote_average)) + geom_point() + geom_smooth() + geom_smooth(method = 'lm', color = 'tomato')

metadata %>% filter(popularity < 50) %>%
ggplot(aes(popularity, vote_average)) + geom_point() + geom_smooth() + geom_smooth(method = 'lm', color = 'tomato')

metadata %>%
arrange(desc(popularity))
metadata %>%
ggplot(aes(popularity, vote_average)) + geom_point() + geom_smooth() + geom_smooth(method = 'lm', color = 'tomato')

metadata %>% filter(original_language == "ja" | original_language == "it" | original_language == "fr" |original_language == "en" ) %>% filter(revenue < 1e+9) %>%
ggplot(aes(original_language, revenue, fill = original_language)) + geom_boxplot() + coord_flip()

metadata %>%
arrange((runtime))
metadata %>% filter(revenue < 1e+9) %>%
ggplot(aes(month, revenue, fill = month)) + geom_boxplot(show.legend = F) + coord_flip()

metadata %>% filter(budget < 2e+8) %>%
ggplot(aes(budget, vote_average)) + geom_point() + geom_smooth()

budget_vote_average_mod = lm(vote_average ~ budget, data = metadata)
budget_vote_average_mod %>%
get_regression_table()
yhat = 6.46 + 0x
metadata %>%
get_correlation(vote_average ~ budget, na.rm = T)
-0.08975617 * -0.08975617
[1] 0.00805617
metadata %>%
ggplot(aes(month, vote_average, fill = month)) + geom_boxplot(show.legend = F) + coord_flip()

metadata %>%
ggplot(aes(popularity, popularity)) + geom_point(show.legend = F)

metadata %>%
ggplot(aes(budget, revenue)) + geom_point(show.legend = F) + geom_smooth() + geom_smooth(method = "lm", color = "tomato")

metadata %>%
arrange(desc(popularity))
metadata %>%
get_correlation(revenue ~ budget, na.rm = T)
0.7091052 * 0.7091052
[1] 0.5028302
budget_revenue_mod = lm(revenue ~ budget, data = metadata)
budget_revenue_mod %>%
get_regression_table()
yhat = 2998573.384 + 3.007x
LS0tCnRpdGxlOiAiTW92aWVzIFByb2plY3QiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCmBgYHtyfQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShtb2Rlcm5kaXZlKQpsaWJyYXJ5KGluZmVyKQptZXRhZGF0YSA9IHJlYWRfY3N2KCJjbGVhbl9tZXRhZGF0YS5jc3YiKQptZXRhZGF0YSAgPSBtZXRhZGF0YSAlPiUgc2VsZWN0KC1YMSkKbWV0YWRhdGEKYGBgCgpgYGB7cn0KbWV0YWRhdGEgJT4lCiAgYXJyYW5nZShkZXNjKGJ1ZGdldCkpCmBgYAoKCmBgYHtyfQptZXRhZGF0YSA9IG1ldGFkYXRhICU+JQogIHNlcGFyYXRlKGNvbCA9IHJlbGVhc2VfZGF0ZSwgaW50byA9IGMoInllYXIiLCAibW9udGgiLCAiZGF5IiksIHNlcCA9ICctJykKCm1ldGFkYXRhCmBgYAoKCmBgYHtyfQptZXRhZGF0YSAlPiUgCiAgZ3JvdXBfYnkobW9udGgpICU+JQogIHN1bW1hcml6ZShtZWRpYW5fcmV2ZW51ZSA9IG1lZGlhbihyZXZlbnVlLCBuYS5ybSA9IFQpKSAlPiUKICBhcnJhbmdlKGRlc2MobWVkaWFuX3JldmVudWUpKQogIApgYGAKCmBgYHtyfQptZXRhZGF0YSAlPiUgZmlsdGVyKHJldmVudWUgPCAxLjVlKzkpICU+JQogIGdncGxvdChhZXMobW9udGgsIHJldmVudWUsIGZpbGwgPSBtb250aCkpICsgZ2VvbV9ib3hwbG90KHNob3cubGVnZW5kID0gRikgKyBjb29yZF9mbGlwKCkKYGBgCgoKCmBgYHtyfQptZXRhZGF0YSAlPiUKICBnZ3Bsb3QoYWVzKGJ1ZGdldCwgdm90ZV9hdmVyYWdlKSkgKyBnZW9tX3BvaW50KCkgKyBnZW9tX3Ntb290aCgpICsgZ2VvbV9zbW9vdGgobWV0aG9kID0gJ2xtJywgY29sb3IgPSAndG9tYXRvJykgKyBnZ3RpdGxlKCJBdmVyYWdlIFZvdGUgdnMuIEJ1ZGdldCIpICsgeGxhYigiQnVkZ2V0IikgKyB5bGFiKCJBdmVyYWdlIFZvdGUiKQpgYGAKYGBge3J9Cm1ldGFkYXRhICU+JQogIGFycmFuZ2UoZGVzYyhyZXZlbnVlKSkgJT4lCiAgc2VsZWN0KHRpdGxlLCByZXZlbnVlLCBidWRnZXQsIHllYXIpICU+JQogIGhlYWQoKQpgYGAKCgpgYGB7cn0KbWV0YWRhdGEgJT4lCiAgZmlsdGVyKGJ1ZGdldCA+IDJlKzgpICU+JQogIGFycmFuZ2UoKHJldmVudWUpKSAlPiUKICBzZWxlY3QodGl0bGUsIHJldmVudWUsIGJ1ZGdldCwgeWVhcikgJT4lCiAgaGVhZCgpCmBgYAoKCmBgYHtyfQptZXRhZGF0YSAlPiUgCiAgICBnZXRfY29ycmVsYXRpb24ocmV2ZW51ZSB+IGJ1ZGdldCwgbmEucm0gPSBUKQpgYGAKCgoKYGBge3J9Cm1ldGFkYXRhICU+JSBmaWx0ZXIocG9wdWxhcml0eSA8IDE1MCkgJT4lCiAgZ2dwbG90KGFlcyhwb3B1bGFyaXR5LCB2b3RlX2F2ZXJhZ2UpKSArIGdlb21fcG9pbnQoKSArIGdlb21fc21vb3RoKCkKYGBgCgoKCmBgYHtyfQoKYGBgCgpgYGB7cn0KbWV0YWRhdGEgJT4lCiAgZ2dwbG90KGFlcyh2b3RlX2F2ZXJhZ2UsIHJldmVudWUpKSArIGdlb21fcG9pbnQoKSArIGdlb21fc21vb3RoKCkgKyBnZW9tX3Ntb290aChtZXRob2QgPSAnbG0nLCBjb2xvciA9ICd0b21hdG8nKQpgYGAKCgoKYGBge3J9Cm1ldGFkYXRhICU+JQogIGdncGxvdChhZXMoYnVkZ2V0LCB2b3RlX2F2ZXJhZ2UpKSArIGdlb21fcG9pbnQoKSArIGdlb21fc21vb3RoKCkgKyBnZW9tX3Ntb290aChtZXRob2QgPSAnbG0nLCBjb2xvciA9ICd0b21hdG8nKQpgYGAKCgoKCgpgYGB7cn0KbWV0YWRhdGEgJT4lIGZpbHRlcihwb3B1bGFyaXR5IDwgNTApICU+JQogIGdncGxvdChhZXMocG9wdWxhcml0eSwgdm90ZV9hdmVyYWdlKSkgKyBnZW9tX3BvaW50KCkgKyBnZW9tX3Ntb290aCgpICsgZ2VvbV9zbW9vdGgobWV0aG9kID0gJ2xtJywgY29sb3IgPSAndG9tYXRvJykKYGBgCgpgYGB7cn0KbWV0YWRhdGEgJT4lIAogIGFycmFuZ2UoZGVzYyhwb3B1bGFyaXR5KSkKYGBgCgoKCmBgYHtyfQptZXRhZGF0YSAlPiUKICBnZ3Bsb3QoYWVzKHBvcHVsYXJpdHksIHZvdGVfYXZlcmFnZSkpICsgZ2VvbV9wb2ludCgpICsgZ2VvbV9zbW9vdGgoKSArIGdlb21fc21vb3RoKG1ldGhvZCA9ICdsbScsIGNvbG9yID0gJ3RvbWF0bycpCmBgYAoKCmBgYHtyfQptZXRhZGF0YSAlPiUgZmlsdGVyKG9yaWdpbmFsX2xhbmd1YWdlID09ICJqYSIgfCBvcmlnaW5hbF9sYW5ndWFnZSA9PSAiaXQiIHwgb3JpZ2luYWxfbGFuZ3VhZ2UgPT0gImZyIiB8b3JpZ2luYWxfbGFuZ3VhZ2UgPT0gImVuIiApICU+JSBmaWx0ZXIocmV2ZW51ZSA8IDFlKzkpICU+JSAKICBnZ3Bsb3QoYWVzKG9yaWdpbmFsX2xhbmd1YWdlLCByZXZlbnVlLCBmaWxsID0gb3JpZ2luYWxfbGFuZ3VhZ2UpKSArIGdlb21fYm94cGxvdCgpICsgY29vcmRfZmxpcCgpCgpgYGAKCgpgYGB7cn0KbWV0YWRhdGEgJT4lCiAgYXJyYW5nZSgocnVudGltZSkpCmBgYAoKCmBgYHtyfQptZXRhZGF0YSAlPiUgZmlsdGVyKHJldmVudWUgPCAxZSs5KSAlPiUKICBnZ3Bsb3QoYWVzKG1vbnRoLCByZXZlbnVlLCBmaWxsID0gbW9udGgpKSArIGdlb21fYm94cGxvdChzaG93LmxlZ2VuZCA9IEYpICsgY29vcmRfZmxpcCgpCmBgYAoKCgpgYGB7cn0KbWV0YWRhdGEgJT4lIGZpbHRlcihidWRnZXQgPCAyZSs4KSAlPiUKICBnZ3Bsb3QoYWVzKGJ1ZGdldCwgdm90ZV9hdmVyYWdlKSkgKyBnZW9tX3BvaW50KCkgKyBnZW9tX3Ntb290aCgpCmBgYAoKYGBge3J9CmJ1ZGdldF92b3RlX2F2ZXJhZ2VfbW9kID0gbG0odm90ZV9hdmVyYWdlIH4gYnVkZ2V0LCBkYXRhID0gbWV0YWRhdGEpCgpidWRnZXRfdm90ZV9hdmVyYWdlX21vZCAlPiUKICBnZXRfcmVncmVzc2lvbl90YWJsZSgpCmBgYAoKeWhhdCA9IDYuNDYgKyAweAoKCmBgYHtyfQptZXRhZGF0YSAlPiUgCiAgICBnZXRfY29ycmVsYXRpb24odm90ZV9hdmVyYWdlIH4gYnVkZ2V0LCBuYS5ybSA9IFQpCmBgYAoKCgoKYGBge3J9Ci0wLjA4OTc1NjE3ICogLTAuMDg5NzU2MTcKYGBgCgoKYGBge3J9Cm1ldGFkYXRhICU+JQogIGdncGxvdChhZXMobW9udGgsIHZvdGVfYXZlcmFnZSwgZmlsbCA9IG1vbnRoKSkgKyBnZW9tX2JveHBsb3Qoc2hvdy5sZWdlbmQgPSBGKSArIGNvb3JkX2ZsaXAoKQpgYGAKCgoKYGBge3J9Cm1ldGFkYXRhICU+JQogIGdncGxvdChhZXMocG9wdWxhcml0eSwgcG9wdWxhcml0eSkpICsgZ2VvbV9wb2ludChzaG93LmxlZ2VuZCA9IEYpCmBgYAoKCmBgYHtyfQptZXRhZGF0YSAlPiUKICBnZ3Bsb3QoYWVzKGJ1ZGdldCwgcmV2ZW51ZSkpICsgZ2VvbV9wb2ludChzaG93LmxlZ2VuZCA9IEYpICsgZ2VvbV9zbW9vdGgoKSArIGdlb21fc21vb3RoKG1ldGhvZCA9ICJsbSIsIGNvbG9yID0gInRvbWF0byIpCmBgYAoKCmBgYHtyfQptZXRhZGF0YSAlPiUKICBhcnJhbmdlKGRlc2MocG9wdWxhcml0eSkpCmBgYAoKYGBge3J9Cm1ldGFkYXRhICU+JSAKICAgIGdldF9jb3JyZWxhdGlvbihyZXZlbnVlIH4gYnVkZ2V0LCBuYS5ybSA9IFQpCmBgYAoKCmBgYHtyfQowLjcwOTEwNTIgKiAwLjcwOTEwNTIKYGBgCgpgYGB7cn0KYnVkZ2V0X3JldmVudWVfbW9kID0gbG0ocmV2ZW51ZSB+IGJ1ZGdldCwgZGF0YSA9IG1ldGFkYXRhKQoKYnVkZ2V0X3JldmVudWVfbW9kICU+JQogIGdldF9yZWdyZXNzaW9uX3RhYmxlKCkKYGBgCgp5aGF0ID0gMjk5ODU3My4zODQgKyAzLjAwN3g=